rm(list=ls())
options(scipen=100,digits=10)

library(rgdal)
library(gdalUtils)
library(raster)
library(dplyr)
library(sf)
library(tidyverse)
library(lfe)
library(rgeos)
library(foreign)
library(lubridate)
library(stringi)
library(MatchIt)

## Find blockgroup-well link
setwd("work directory")
well_info=read.csv("well_info.csv")

setwd("work directory/socio_econ_data")
Block_Group=st_read("PA_2010/PA_blck_grp_2010.shp")

well=well_info[c("OBJECTID","LATITUDE","LONGITUDE")]
coordinates(well)=c("LONGITUDE","LATITUDE")
proj4string(well)=CRS("+proj=longlat +ellps=WGS84 +datum=WGS84")
well=st_as_sf(well) 
well=st_transform(well, st_crs(Block_Group))

##Intersect
Block_Group$GISJOIN=paste("G",Block_Group$STATEFP10, "0",Block_Group$COUNTYFP10,
                          "0", Block_Group$TRACTCE10, Block_Group$BLKGRPCE10, sep = "")
Block_Group=Block_Group[c("GISJOIN")]

gg=st_intersects(well,Block_Group)
aa=as.matrix(gg)
gg=as.data.frame(gg)


gg$OBJECTID=well$OBJECTID

gg$Block=Block_Group$GISJOIN[gg$col.id]

gg=gg[3:4]
gg$Block=as.character(gg$Block)
gg_2010=gg
gg_2010$Year=2010

Block_Group=st_read("bg42_d00.shp")

well=well_info[c("OBJECTID","LATITUDE","LONGITUDE")]
coordinates(well)=c("LONGITUDE","LATITUDE")
proj4string(well)=CRS("+proj=longlat +ellps=WGS84 +datum=WGS84")
well=st_as_sf(well) 
Block_Group=st_set_crs(Block_Group, st_crs(well))

##Intersect
Block_Group$Tract_ID=stri_pad_right(as.character(Block_Group$TRACT),6,0)
Block_Group$GISJOIN=paste("G",Block_Group$STATE, "0",Block_Group$COUNTY,
                          "0", Block_Group$Tract_ID, Block_Group$BLKGROUP, sep = "")
Block_Group=Block_Group[c("GISJOIN")]

gg=st_intersects(well,Block_Group)
aa=as.matrix(gg)
gg=as.data.frame(gg)


gg$OBJECTID=well$OBJECTID

gg$Block=Block_Group$GISJOIN[gg$col.id]

gg=gg[3:4]
gg$Block=as.character(gg$Block)
gg_2000=gg
gg_2000$Year=2000

## Census block definition is same for 2010 and 2018
gg_2018=gg_2010
gg_2018$Year=2018

gg=rbind(gg_2000,gg_2010,gg_2018)

setwd("work directory")
load("Main_Data.Rdata")
treated_list=unique(data$OBJECTID[which(data$window==1|data$prod==1)])
control_list=unique(data$OBJECTID[which(!(data$OBJECTID%in%treated_list))])

gg$Group=NA
gg$Group[which(gg$OBJECTID%in%treated_list)]="Treatment Group"
gg$Group[which(gg$OBJECTID%in%control_list)]="Control Group"
gg=subset(gg, !(is.na(gg$Group)))




## Load socio-economic characteristics
setwd("work directory/socio_econ_data")
Pop_2000=read.csv("nhgis0015_ds147_2000_blck_grp.csv")
Pop_2000=Pop_2000[c("GISJOIN","YEAR","FXS001")]
names(Pop_2000)=c("Block","Year","Population")

Income_2000=read.csv("nhgis0015_ds152_2000_blck_grp.csv")
Income_2000=Income_2000[c("GISJOIN","YEAR","HG4001")]
names(Income_2000)=c("Block","Year","Income_Per_Capita")

Educ_2000=read.csv("nhgis0015_ds152_2000_blck_grp.csv")
Educ_2000$all_HighSchool_Above=(rowSums(Educ_2000[,22:29]) + rowSums(Educ_2000[,38:45]))/ rowSums(Educ_2000[,14:45])
Educ_2000=Educ_2000[c("GISJOIN","YEAR","all_HighSchool_Above")]
names(Educ_2000)[1:2]=c("Block","Year")

House_Median_2000=read.csv("nhgis0015_ds152_2000_blck_grp.csv")
House_Median_2000=House_Median_2000[c("GISJOIN","YEAR","G8V001")]
names(House_Median_2000)=c("Block","Year","Median_House_Value")
House_Median_2000$Year=2000

Pop_2010=read.csv("nhgis0015_ds172_2010_blck_grp.csv")
Pop_2010=Pop_2010[c("GISJOIN","YEAR","H7V001")]
names(Pop_2010)=c("Block","Year","Population")

Income_2010=read.csv("nhgis0015_ds176_20105_2010_blck_grp.csv")
Income_2010=Income_2010[c("GISJOIN","YEAR","JQBE001")]
names(Income_2010)=c("Block","Year","Income_Per_Capita")
Income_2010$Year=2010

Educ_2010=read.csv("nhgis0015_ds176_20105_2010_blck_grp.csv")
Educ_2010$all_HighSchool_Above=(rowSums(Educ_2010[,47:54]) + rowSums(Educ_2010[,64:71]))/(Educ_2010$JN9E001)
Educ_2010=Educ_2010[c("GISJOIN","YEAR","all_HighSchool_Above")]
names(Educ_2010)[1:2]=c("Block","Year")
Educ_2010$Year=2010

House_Median_2010=read.csv("nhgis0015_ds176_20105_2010_blck_grp.csv")
House_Median_2010=House_Median_2010[c("GISJOIN","YEAR","JTIE001")]
names(House_Median_2010)=c("Block","Year","Median_House_Value")
House_Median_2010$Year=2010

data_2018=read.csv("nhgis0016_ds239_20185_2018_blck_grp.csv")
Pop_2018=data_2018[c("GISJOIN","YEAR","AJYPE001")]
names(Pop_2018)=c("Block","Year","Population")
Pop_2018$Year=2018

Income_2018=data_2018[c("GISJOIN","YEAR","AJ0EE001")]
names(Income_2018)=c("Block","Year","Income_Per_Capita")
Income_2018$Year=2018

Educ_2018=data_2018
Educ_2018$all_HighSchool_Above=(rowSums(data_2018[,55:63]))/(data_2018$AJYPE001)
Educ_2018=Educ_2018[c("GISJOIN","YEAR","all_HighSchool_Above")]
names(Educ_2018)[1:2]=c("Block","Year")
Educ_2018$Year=2018

House_Median_2018=data_2018[c("GISJOIN","YEAR","AJ3QM001")]
names(House_Median_2018)=c("Block","Year","Median_House_Value")
House_Median_2018$Year=2018


Pop=rbind(Pop_2000,Pop_2010,Pop_2018)
Pop$Block=as.character(Pop$Block)
Income=rbind(Income_2000,Income_2010,Income_2018)
Income$Block=as.character(Income$Block)
Educ=rbind(Educ_2000,Educ_2010, Educ_2018)
Educ$Block=as.character(Educ$Block)
House_Median=rbind(House_Median_2000,House_Median_2010, House_Median_2018)
House_Median$Block=as.character(House_Median$Block)

gg=left_join(gg, Pop, by=c("Block","Year"))
gg=left_join(gg, Income, by=c("Block","Year"))
gg=left_join(gg, Educ, by=c("Block","Year"))
gg=left_join(gg, House_Median, by=c("Block","Year"))

well_info$spud_year=year(well_info$C_Start_RFF)
well_spud_year=well_info[c("OBJECTID","spud_year")]

gg=subset(gg, Year==2010)

gg$Spud=0
gg$Spud[which(gg$Group=="Treatment Group")]=1

gg=left_join(gg, well_spud_year, by="OBJECTID")
gg$spud_year[which(is.na(gg$spud_year))]=0

re=matchit(Spud ~ factor(spud_year)+Population + Income_Per_Capita + all_HighSchool_Above + Median_House_Value, data = gg, method = "nearest")
aaa=match.data(re)
matched=aaa$OBJECTID


### Run Matched Regression
setwd("working directory")
load("Main_Data.Rdata")

data=subset(data, OBJECTID%in%matched)

DiD_base_density=felm(AOD ~ window + prod +   Treatment_window_count_20km +  Treatment_prod_count_20km + Plug_Count_20KM
                      + PPT + TDMEAN + TMEAN + Wind_Speed | 
                        factor(OBJECTID)+factor(DATE) |0 |clust, data = data)
summary(DiD_base_density)

DiD_spatial_density=felm(AOD ~ window + prod + window_2 + window_5 + window_10
                         + prod_2 + prod_5 + prod_10 + Treatment_window_count_20km +  Treatment_prod_count_20km + Plug_Count_20KM
                         + PPT + TDMEAN + TMEAN + Wind_Speed | 
                           factor(OBJECTID)+factor(DATE) |0 |clust , data = data)
summary(DiD_spatial_density)


